clear all

local startyear = 1999
local endyear = 2019 

local TitleIVdir ../Data/TitleIV/Files // ~/Research/StudentDebt/Data/TitleIV

***********************************
* Direct Loans
***********************************

	forvalues year = 1999/2019 {
		di "`year'"

		* Through 2005 the year's total activity were all reported in just one ``summary'' spreadsheet
		if inrange(`year',1999,2005) import excel using `TitleIVdir'/DL_AwardYr_Summary_AY`year'_`:di `year'+1'_All.xls, clear cellrange(A6) firstrow case(lower)
		* Starting 2006 there is a spreadsheet for each quarter and the totals for the year are in a separate sheet labeled ``Award Year Summary''
		* In some of the years, the exact location of the starting data jumps up or down by one row compared to the others...
		* In 2006 alone, everything is shifted up by one row compared to the other years
		if inrange(`year',2006,2006) import excel using `TitleIVdir'/DL_Dashboard_AY`year'_`:di `year'+1'_Q4.xls, clear sheet("Award Year Summary") cellrange(A5) firstrow case(lower) 
		if inrange(`year',2007,2016) import excel using `TitleIVdir'/DL_Dashboard_AY`year'_`:di `year'+1'_Q4.xls, clear sheet("Award Year Summary") cellrange(A6) firstrow case(lower) 
		if inrange(`year',2017,2017) import excel using `TitleIVdir'/DL_Dashboard_AY`year'_`:di `year'+1'_Q4.xls, clear sheet("Award Year Summary") cellrange(A7) firstrow case(lower) 
		if inrange(`year',2018,2019) import excel using `TitleIVdir'/dl-dashboard-ay`year'-`:di `year'+1'-q4.xls, clear sheet("Award Year Summary") cellrange(A7) firstrow case(lower) 

		* 1999 - 2004: DL subs (combined), DL unsubs (combined), DL PLUS (just undergrad - the PLUS program did not yet exist for grad students)
		if inrange(`year',1999,2004) {
			rename recipients DL_comb_subs_numreps
			rename ofloansoriginated DL_comb_subs_numloans
			rename h DL_comb_subs_volloans
			rename ofdisbursements DL_comb_subs_numdisb
			rename j DL_comb_subs_voldisb
			rename k DL_comb_unsubs_numreps
			rename l DL_comb_unsubs_numloans
			rename m DL_comb_unsubs_volloans
			rename n DL_comb_unsubs_numdisb
			rename o DL_comb_unsubs_voldisb
			rename p DL_ugrad_plus_numreps
			rename q DL_ugrad_plus_numloans
			rename r DL_ugrad_plus_volloans
			rename s DL_ugrad_plus_numdisb
			rename t DL_ugrad_plus_voldisb
		}

		* As of 7/1/2006, the PLUS program was opened up to graduate students, so there is separate reporting for PLUS loans by undergrad and grad starting in 2005-2006, although hardly any grad PLUS loans had been originated yet
		if inrange(`year',2005,2009) {
			rename recipients DL_comb_subs_numreps
			rename ofloansoriginated DL_comb_subs_numloans
			rename h DL_comb_subs_volloans
			rename ofdisbursements DL_comb_subs_numdisb
			rename j DL_comb_subs_voldisb
			rename k DL_comb_unsubs_numreps
			rename l DL_comb_unsubs_numloans
			rename m DL_comb_unsubs_volloans
			rename n DL_comb_unsubs_numdisb
			rename o DL_comb_unsubs_voldisb
			rename p DL_ugrad_plus_numreps
			rename q DL_ugrad_plus_numloans
			rename r DL_ugrad_plus_volloans
			rename s DL_ugrad_plus_numdisb
			rename t DL_ugrad_plus_voldisb
			rename u DL_grad_plus_numreps
			rename v DL_grad_plus_numloans
			rename w DL_grad_plus_volloans
			rename x DL_grad_plus_numdisb
			rename y DL_grad_plus_voldisb
		}

		* In 2010, the FL program was discontinued and all loans became direct loans. At the same time, the DL data begin reporting DL subs and unsubs loans separately for grad and undergrad
		if inrange(`year',2010,2011) {
			rename recipients DL_ugrad_subs_numreps
			rename ofloansoriginated DL_ugrad_subs_numloans
			rename h DL_ugrad_subs_volloans
			rename ofdisbursements DL_ugrad_subs_numdisb
			rename j DL_ugrad_subs_voldisb
			rename k DL_grad_subs_numreps
			rename l DL_grad_subs_numloans
			rename m DL_grad_subs_volloans
			rename n DL_grad_subs_numdisb
			rename o DL_grad_subs_voldisb
			rename p DL_ugrad_unsubs_numreps
			rename q DL_ugrad_unsubs_numloans
			rename r DL_ugrad_unsubs_volloans
			rename s DL_ugrad_unsubs_numdisb
			rename t DL_ugrad_unsubs_voldisb
			rename u DL_grad_unsubs_numreps
			rename v DL_grad_unsubs_numloans
			rename w DL_grad_unsubs_volloans
			rename x DL_grad_unsubs_numdisb
			rename y DL_grad_unsubs_voldisb
			rename z DL_ugrad_plus_numreps
			rename aa DL_ugrad_plus_numloans
			rename ab DL_ugrad_plus_volloans
			rename ac DL_ugrad_plus_numdisb
			rename ad DL_ugrad_plus_voldisb
			rename ae DL_grad_plus_numreps
			rename af DL_grad_plus_numloans
			rename ag DL_grad_plus_volloans
			rename ah DL_grad_plus_numdisb
			rename ai DL_grad_plus_voldisb
		}

		* In 2012, graduate students were no longer allowed to borrow subsidized DLs, so the data just report one "subsidized" category, but this means undergraduate.
		if inrange(`year',2012,2019) {
			rename recipients DL_ugrad_subs_numreps
			rename ofloansoriginated DL_ugrad_subs_numloans
			rename h DL_ugrad_subs_volloans
			rename ofdisbursements DL_ugrad_subs_numdisb
			rename j DL_ugrad_subs_voldisb
			rename k DL_ugrad_unsubs_numreps
			rename l DL_ugrad_unsubs_numloans
			rename m DL_ugrad_unsubs_volloans
			rename n DL_ugrad_unsubs_numdisb
			rename o DL_ugrad_unsubs_voldisb
			rename p DL_grad_unsubs_numreps
			rename q DL_grad_unsubs_numloans
			rename r DL_grad_unsubs_volloans
			rename s DL_grad_unsubs_numdisb
			rename t DL_grad_unsubs_voldisb
			rename u DL_ugrad_plus_numreps
			rename v DL_ugrad_plus_numloans
			rename w DL_ugrad_plus_volloans
			rename x DL_ugrad_plus_numdisb
			rename y DL_ugrad_plus_voldisb
			rename z DL_grad_plus_numreps
			rename aa DL_grad_plus_numloans
			rename ab DL_grad_plus_volloans
			rename ac DL_grad_plus_numdisb
			rename ad DL_grad_plus_voldisb
		}

		* Process and save the numbers
		foreach numericvar of varlist DL_* {
			di "`numericvar'"
			capture confirm numeric variable `numericvar'
			if _rc ~= 0 {
				replace `numericvar' = subinstr(`numericvar',"$","",.)
				replace `numericvar' = subinstr(`numericvar',",","",.)
				g negative_flag = 1 if regexm(`numericvar',"\([0-9\.]*\)") | regexm(`numericvar',"-[0-9\.]*")
				replace `numericvar' = subinstr(`numericvar',"-","",.)
				replace `numericvar' = subinstr(`numericvar',"(","",.)
				replace `numericvar' = subinstr(`numericvar',")","",.)
				destring `numericvar', replace
				replace `numericvar' = -`numericvar' if negative_flag == 1
				drop negative_flag
			}
		}
		* some years include schools from the UK, which have non-numeric "zip codes"
		tostring zipcode, replace 
		g awardyear = `year'
		* OPEID is actually an 8-digit string but sometimes gets input as a number 
		tostring opeid, replace
		* Some years read in blank rows at the end of the spreadsheet:
		drop if opeid == ""
		replace opeid = "0" + opeid if length(opeid) < 8
		replace opeid = "0" + opeid if length(opeid) < 8

		isid opeid
		order opeid awardyear

		tempfile DL_AY`year'
		save `DL_AY`year''

	}
	clear
	forvalues year = 1999/2019 {
		append using `DL_AY`year''
	}
	tempfile DL_annual
	save `DL_annual'

***********************************
* FFEL loans (aka FL in some years)
***********************************

	forvalues year = 1999/2009 {

		* Through 2005 the year's total activity were all reported in just one ``summary'' spreadsheet
		if inrange(`year',1999,2005) import excel using `TitleIVdir'/FFEL_AwardYr_Summary_AY`year'_`:di `year'+1'_All.xls, clear cellrange(A6) firstrow case(lower)
		* Starting 2006 there is a spreadsheet for each quarter and the totals for the year are in a separate sheet labeled ``Award Year Summary''
		* In some of the years, the exact location of the starting data jumps up or down by one row compared to the others...
		* In 2006 alone, everything is shifted up by one row compared to the other years
		if inrange(`year',2006,2006) import excel using `TitleIVdir'/FL_Dashboard_AY`year'_`:di `year'+1'_Q4.xls, clear sheet("Award Year Summary") cellrange(A5) firstrow case(lower) 
		if inrange(`year',2007,.) import excel using `TitleIVdir'/FL_Dashboard_AY`year'_`:di `year'+1'_Q4.xls, clear sheet("Award Year Summary") cellrange(A6) firstrow case(lower) 
		* Other than plus loans, FL loans were never separated by grad and undergrad in our spreadsheets
		* Subsidized
			rename recipients FL_comb_subs_numreps
			rename ofloansoriginated FL_comb_subs_numloans
			rename h FL_comb_subs_volloans
			rename ofdisbursements FL_comb_subs_numdisb
			rename j FL_comb_subs_voldisb
		* Unsubsidized
			rename k FL_comb_unsubs_numreps
			rename l FL_comb_unsubs_numloans
			rename m FL_comb_unsubs_volloans
			rename n FL_comb_unsubs_numdisb
			rename o FL_comb_unsubs_voldisb
		* Undergraduate parent plus
			rename p FL_ugrad_plus_numreps
			rename q FL_ugrad_plus_numloans
			rename r FL_ugrad_plus_volloans
			rename s FL_ugrad_plus_numdisb
			rename t FL_ugrad_plus_voldisb
		* Graduate plus loans: Only existed from 2005 onward
		if `year' >= 2005 {
			rename u FL_grad_plus_numreps
			rename v FL_grad_plus_numloans
			rename w FL_grad_plus_volloans
			rename x FL_grad_plus_numdisb
			rename y FL_grad_plus_voldisb
		}
		foreach numericvar of varlist FL_* {
			capture confirm numeric variable `numericvar'
			if _rc ~= 0 {
				replace `numericvar' = subinstr(`numericvar',"$","",.)
				replace `numericvar' = subinstr(`numericvar',"-","",.)
				replace `numericvar' = subinstr(`numericvar',",","",.)
				g negative_flag = 1 if regexm(`numericvar',"\([0-9\.]*\)")
				replace `numericvar' = subinstr(`numericvar',"(","",.)
				replace `numericvar' = subinstr(`numericvar',")","",.)
				destring `numericvar', replace
				replace `numericvar' = -`numericvar' if negative_flag == 1
				drop negative_flag
			}
		}
		* some years include schools from the UK, which have non-numeric "zip codes"
		tostring zipcode, replace 
		g awardyear = `year'
		* OPEID is actually an 8-digit string but sometimes gets input as a number 
		tostring opeid, replace
		* Some years read in blank rows at the end of the spreadsheet:
		drop if opeid == ""
		replace opeid = "0" + opeid if length(opeid) < 8
		replace opeid = "0" + opeid if length(opeid) < 8

		isid opeid
		order opeid awardyear

		tempfile FL_AY`year'
		save `FL_AY`year''
	}

	clear
	
	forvalues year = 1999/2009 {
		append using `FL_AY`year''
	}
	
	isid opeid awardyear

* Merge annual datasets
	merge 1:1 opeid awardyear using `DL_annual', nogen update 
	save ../Data/TitleIV/TitleIV.dta, replace
